import re
from lxml import etree,html

def get_plain_text(doc):
    tree = etree.HTML(str(doc))
    texts = tree.xpath('//text()')
    plain_text = ''.join(texts)
    # print(plain_text)
    res = re.compile(r'[\f\n\r\t\v&nbsp]+', re.S)
    plain_text = re.sub(res, "", plain_text)
    res_place = re.compile(' {2,}',re.S)
    plain_text = re.sub(res_place,',',plain_text).strip(',')
    return plain_text
